Please visit https://github.com/arifpras/KelasData for the datasets and other relevant materials to be used during the sessions.
rm(list=ls())
ls()## character(0)
#setting working directory
setwd("/Users/arifpras/OneDrive - The University of Nottingham/BB_KelasData/KelasData/00_Datasets")op_sales <- readxl::read_excel(path = "/Users/arifpras/OneDrive - The University of Nottingham/BB_KelasData/KelasData/00_Datasets/OP_all.xlsx", sheet = "OP_sales")glimpse(op_sales)## Rows: 51
## Columns: 8
## $ volume <dbl> 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63…
## $ title <chr> "Arriving Once Again", "The 11 Supernovas", "Roger and…
## $ release_date <dttm> 2008-06-04, 2008-09-04, 2008-12-04, 2009-03-04, 2009-…
## $ pages <dbl> 216, 232, 216, 216, 216, 200, 216, 216, 216, 216, 216,…
## $ chapters <dbl> 10, 11, 10, 10, 10, 9, 10, 11, 11, 11, 10, 9, 11, 12, …
## $ sales_mio <dbl> 1.441785, 1.517953, 1.476194, 1.641952, 1.609125, 1.66…
## $ last_movie_days <dbl> 95, 187, 278, 368, 460, 552, 643, 82, 174, 235, 327, 4…
## $ vixi_avg <dbl> 23.60, 39.95, 48.06, 42.48, 28.58, 24.40, 20.15, 17.16…
#change the date format: from <POSITX> to <date>
op_sales <- op_sales %>%
mutate(release_date = as.Date(release_date))
glimpse(op_sales)## Rows: 51
## Columns: 8
## $ volume <dbl> 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63…
## $ title <chr> "Arriving Once Again", "The 11 Supernovas", "Roger and…
## $ release_date <date> 2008-06-04, 2008-09-04, 2008-12-04, 2009-03-04, 2009-…
## $ pages <dbl> 216, 232, 216, 216, 216, 200, 216, 216, 216, 216, 216,…
## $ chapters <dbl> 10, 11, 10, 10, 10, 9, 10, 11, 11, 11, 10, 9, 11, 12, …
## $ sales_mio <dbl> 1.441785, 1.517953, 1.476194, 1.641952, 1.609125, 1.66…
## $ last_movie_days <dbl> 95, 187, 278, 368, 460, 552, 643, 82, 174, 235, 327, 4…
## $ vixi_avg <dbl> 23.60, 39.95, 48.06, 42.48, 28.58, 24.40, 20.15, 17.16…
(
db01 <- op_sales %>%
pivot_longer(
cols = pages:vixi_avg,
names_to = "obsvar",
values_to = "obsval"
) %>%
mutate(
obsvar = recode(
obsvar,
chapters = "Chapters",
last_movie_days = "Days from Last Movie",
pages = "Pages",
sales_mio = "Total Sales (in millions)",
vixi_avg = "VIX Index (in average)"
)
)
)(
plot01 <- db01 %>%
#plot the dataset
ggplot(aes(x = volume, y = obsval)) +
geom_line(aes(color = obsvar)) +
facet_wrap( ~ obsvar, scales = "free", nrow = 2) +
theme_light() +
labs(x = "\nVolume", y = "") +
#ggthemes::scale_color_colorblind() +
scale_color_brewer(palette = "Set1") +
theme(
axis.text.x = element_text(size = 8),
axis.ticks.x = element_blank(),
axis.line.x = element_blank(),
axis.title.x = element_text(size = 7),
axis.line.y = element_blank(),
axis.title.y = element_text(size = 7),
axis.text.y = element_text(size = 8),
plot.title.position = "plot",
plot.caption.position = "plot",
legend.title = element_text(size = 7),
legend.text = element_text(size = 7),
legend.key.size = unit(0.5, "cm"),
legend.spacing = unit(1, "cm"),
plot.title = element_text(hjust = 0, size = 13, face = "bold"),
plot.subtitle = element_text(hjust = 0, size = 12),
plot.caption = element_text(hjust = 0, size = 12),
# legend.title = element_blank(),
legend.position = "bottom",
strip.text.x = element_text(size = 8),
panel.grid.major.y = element_line(colour = "grey97"),
panel.ontop = FALSE,
panel.spacing = unit(1, "lines")
) +
guides(color = guide_legend(title = "Observable variables:"))
)pacman::p_load(colorspace)
(
plot02 <- db01 %>%
#plot the dataset
ggplot(aes(x = volume, y = obsval)) +
geom_boxplot(aes(
color = obsvar,
fill = after_scale(desaturate(lighten(color, .7), .7))
),
size = 1) +
#geom_line(aes(color = obsvar)) +
facet_wrap(~ obsvar, scales = "free", nrow = 2) +
theme_light() +
labs(
title = "Boxplot",
x = "\nVolume", y = "") +
#ggthemes::scale_color_colorblind() +
scale_color_brewer(palette = "Set1") +
theme(
axis.text.x = element_text(size = 8),
axis.ticks.x = element_blank(),
axis.line.x = element_blank(),
axis.title.x = element_text(size = 7),
axis.line.y = element_blank(),
axis.title.y = element_text(size = 7),
axis.text.y = element_text(size = 8),
plot.title.position = "plot",
plot.caption.position = "plot",
legend.title = element_text(size = 7),
legend.text = element_text(size = 7),
legend.key.size = unit(0.5, "cm"),
legend.spacing = unit(1, "cm"),
plot.title = element_text(hjust = 0, size = 16, face = "bold"),
plot.subtitle = element_text(hjust = 0, size = 12),
plot.caption = element_text(hjust = 0, size = 12),
# legend.title = element_blank(),
legend.position = "bottom",
strip.text.x = element_text(size = 8),
panel.grid.major.y = element_line(colour = "grey97"),
panel.ontop = FALSE,
panel.spacing = unit(1, "lines")
) +
guides(color = guide_legend(title = "Observable variables:"))
)#pacman::p_load(colorspace)
(
plot03 <- db01 %>%
#plot the dataset
ggplot(aes(x = obsval)) +
geom_histogram(bins = 50,
aes(
color = obsvar,
fill = after_scale(desaturate(lighten(color, .7), .7))
)) +
facet_wrap(~ obsvar, scales = "free", nrow = 2) +
theme_light() +
labs(
title = "Histogram",
x = "\nRelease Date", y = "") +
#ggthemes::scale_color_colorblind() +
scale_color_brewer(palette = "Set1") +
theme(
axis.text.x = element_text(size = 8),
axis.ticks.x = element_blank(),
axis.line.x = element_blank(),
axis.title.x = element_text(size = 7),
axis.line.y = element_blank(),
axis.title.y = element_text(size = 7),
axis.text.y = element_text(size = 8),
plot.title.position = "plot",
plot.caption.position = "plot",
legend.title = element_text(size = 7),
legend.text = element_text(size = 7),
legend.key.size = unit(0.5, "cm"),
legend.spacing = unit(1, "cm"),
plot.title = element_text(hjust = 0, size = 16, face = "bold"),
plot.subtitle = element_text(hjust = 0, size = 12),
plot.caption = element_text(hjust = 0, size = 12),
# legend.title = element_blank(),
legend.position = "bottom",
strip.text.x = element_text(size = 8),
panel.grid.major.y = element_line(colour = "grey97"),
panel.ontop = FALSE,
panel.spacing = unit(1, "lines")
) +
guides(color = guide_legend(title = "Observable variables:"))
)op_sales %>% select(release_date, pages, chapters, sales_mio, last_movie_days, vixi_avg) %>%
skimr::skim()| Name | Piped data |
| Number of rows | 51 |
| Number of columns | 6 |
| _______________________ | |
| Column type frequency: | |
| Date | 1 |
| numeric | 5 |
| ________________________ | |
| Group variables | None |
Variable type: Date
| skim_variable | n_missing | complete_rate | min | max | median | n_unique |
|---|---|---|---|---|---|---|
| release_date | 0 | 1 | 2008-06-04 | 2021-09-03 | 2014-09-04 | 51 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| pages | 0 | 1 | 214.90 | 11.09 | 192.00 | 208.00 | 216.00 | 216.00 | 248.00 | ▃▂▇▂▁ |
| chapters | 0 | 1 | 10.47 | 0.67 | 9.00 | 10.00 | 10.00 | 11.00 | 12.00 | ▁▇▁▆▁ |
| sales_mio | 0 | 1 | 2.22 | 0.45 | 1.44 | 1.78 | 2.33 | 2.61 | 2.90 | ▇▅▃▇▇ |
| last_movie_days | 0 | 1 | 485.86 | 326.63 | 44.00 | 230.00 | 419.00 | 673.00 | 1297.00 | ▇▆▅▂▂ |
| vixi_avg | 0 | 1 | 19.90 | 8.57 | 10.55 | 14.14 | 16.77 | 23.10 | 48.06 | ▇▃▁▁▁ |
pacman::p_load(pastecs, stargazer)
(
summ01 <- op_sales %>%
select(pages:vixi_avg) %>%
rename("Chapters" = chapters,
"Days from Last Movie" = last_movie_days,
"Pages" = pages,
"Total Sales (in millions)" = sales_mio,
"VIX Index (in average)" = vixi_avg)
)summ01 %>%
as.data.frame() %>%
stargazer(type = 'text', out = "descsumm.txt", digits = 1) #flip = TRUE##
## ========================================================================
## Statistic N Mean St. Dev. Min Pctl(25) Pctl(75) Max
## ------------------------------------------------------------------------
## Pages 51 214.9 11.1 192 208 216 248
## Chapters 51 10.5 0.7 9 10 11 12
## Total Sales (in millions) 51 2.2 0.4 1.4 1.8 2.6 2.9
## Days from Last Movie 51 485.9 326.6 44 230 673 1,297
## VIX Index (in average) 51 19.9 8.6 10.5 14.1 23.1 48.1
## ------------------------------------------------------------------------
(
summ02 <- db01 %>%
group_by(obsvar) %>%
summarise(
Obs = n(),
Mean = mean(obsval, na.rm = TRUE),
Std.Dev = sd(obsval, na.rm = TRUE),
Min. = min(obsval, na.rm = TRUE),
Max. = max(obsval, na.rm = TRUE),
P25 = quantile(obsval, 0.25, na.rm = TRUE),
P75 = quantile(obsval, 0.75, na.rm = TRUE),
Var. = var(obsval, na.rm = TRUE)
)
)pacman::p_load(parameters)
(
summ03 <- op_sales %>%
select(pages:vixi_avg) %>%
rename("Chapters" = chapters,
"Days from Last Movie" = last_movie_days,
"Pages" = pages,
"Total Sales (in millions)" = sales_mio,
"VIX Index (in average)" = vixi_avg) %>%
describe_distribution() %>%
t()
)## [,1] [,2] [,3]
## Variable "Pages" "Chapters" "Total Sales (in millions)"
## Mean "214.901961" " 10.470588" " 2.220214"
## SD " 11.0873891" " 0.6738825" " 0.4455385"
## IQR " 8.000000" " 1.000000" " 0.850304"
## Min "192.000000" " 9.000000" " 1.441785"
## Max " 248.000000" " 12.000000" " 2.901059"
## Skewness " 0.3962788" " 0.3149269" "-0.2057600"
## Kurtosis " 0.5150045" "-0.0597263" "-1.3611366"
## n "51" "51" "51"
## n_Missing "0" "0" "0"
## [,4] [,5]
## Variable "Days from Last Movie" "VIX Index (in average)"
## Mean "485.862745" " 19.899804"
## SD "326.6287813" " 8.5725010"
## IQR "451.000000" " 9.290000"
## Min " 44.000000" " 10.550000"
## Max "1297.000000" " 48.060000"
## Skewness " 0.6986392" " 1.5833807"
## Kurtosis "-0.3056124" " 2.1995930"
## n "51" "51"
## n_Missing "0" "0"
library(corrr)##
## Attaching package: 'corrr'
## The following object is masked from 'package:skimr':
##
## focus
(
corr01 <- op_sales %>%
select(pages:vixi_avg) %>%
rename("Chapters" = chapters,
"Days from Last Movie" = last_movie_days,
"Pages" = pages,
"Total Sales (in millions)" = sales_mio,
"VIX Index (in average)" = vixi_avg) %>%
correlate() %>%
shave() %>%
fashion()
)##
## Correlation method: 'pearson'
## Missing treated using: 'pairwise.complete.obs'
ols01bs <- lm(sales_mio ~ chapters + last_movie_days + vixi_avg,
data = op_sales)
summary(ols01bs)##
## Call:
## lm(formula = sales_mio ~ chapters + last_movie_days + vixi_avg,
## data = op_sales)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.74522 -0.31147 -0.04718 0.34711 0.76190
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.6107927 0.9671887 1.665 0.10248
## chapters 0.1028203 0.0879605 1.169 0.24832
## last_movie_days -0.0001212 0.0001866 -0.649 0.51928
## vixi_avg -0.0205174 0.0071327 -2.877 0.00603 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4149 on 47 degrees of freedom
## Multiple R-squared: 0.1847, Adjusted R-squared: 0.1326
## F-statistic: 3.548 on 3 and 47 DF, p-value: 0.02135
ols02bs <- lm(sales_mio ~ pages + last_movie_days + vixi_avg,
data = op_sales)
summary(ols02bs)##
## Call:
## lm(formula = sales_mio ~ pages + last_movie_days + vixi_avg,
## data = op_sales)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.73780 -0.24534 -0.03507 0.26933 0.70463
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.963e-01 1.099e+00 -0.451 0.65377
## pages 1.481e-02 4.997e-03 2.964 0.00475 **
## last_movie_days -6.218e-05 1.750e-04 -0.355 0.72402
## vixi_avg -2.194e-02 6.591e-03 -3.329 0.00170 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3864 on 47 degrees of freedom
## Multiple R-squared: 0.2931, Adjusted R-squared: 0.248
## F-statistic: 6.497 on 3 and 47 DF, p-value: 0.0009077
pacman::p_load(broom)
(
ols01dp <- op_sales %>%
select(pages:vixi_avg) %>%
lm(sales_mio ~ chapters + last_movie_days + vixi_avg,
data = .) %>%
tidy()
)(
ols02dp <- op_sales %>%
select(pages:vixi_avg) %>%
lm(sales_mio ~ pages + last_movie_days + vixi_avg,
data = .) %>%
tidy()
)pacman::p_load(estimatr)
(
ols01es <- op_sales %>%
lm_robust(sales_mio ~ chapters + last_movie_days + vixi_avg,
data = . ,se_type = "stata") %>%
tidy()
)pacman::p_load(estimatr)
(
ols02es <- op_sales %>%
lm_robust(sales_mio ~ pages + last_movie_days + vixi_avg,
data = . ,se_type = "stata") %>%
tidy()
)stargazer(ols01bs, ols02bs, type = "text", out = "ols_base.txt")##
## ==========================================================
## Dependent variable:
## ----------------------------
## sales_mio
## (1) (2)
## ----------------------------------------------------------
## chapters 0.103
## (0.088)
##
## pages 0.015***
## (0.005)
##
## last_movie_days -0.0001 -0.0001
## (0.0002) (0.0002)
##
## vixi_avg -0.021*** -0.022***
## (0.007) (0.007)
##
## Constant 1.611 -0.496
## (0.967) (1.099)
##
## ----------------------------------------------------------
## Observations 51 51
## R2 0.185 0.293
## Adjusted R2 0.133 0.248
## Residual Std. Error (df = 47) 0.415 0.386
## F Statistic (df = 3; 47) 3.548** 6.497***
## ==========================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
pacman::p_load(texreg)
screenreg(list(ols01bs, ols02bs), digits = 2)##
## ===================================
## Model 1 Model 2
## -----------------------------------
## (Intercept) 1.61 -0.50
## (0.97) (1.10)
## chapters 0.10
## (0.09)
## last_movie_days -0.00 -0.00
## (0.00) (0.00)
## vixi_avg -0.02 ** -0.02 **
## (0.01) (0.01)
## pages 0.01 **
## (0.00)
## -----------------------------------
## R^2 0.18 0.29
## Adj. R^2 0.13 0.25
## Num. obs. 51 51
## ===================================
## *** p < 0.001; ** p < 0.01; * p < 0.05
#knitreg(list(ols01bs, ols02bs), digits = 2)
#wordreg(list(ols01bs, ols02bs), digits = 2, file = "ols_reg.doc")
plotreg(list(ols01bs, ols02bs), digits = 2)## Models: bars denote 0.5 (inner) resp. 0.95 (outer) confidence intervals (computed from standard errors).
lm_robust & wordreg#doesn't work: lm_robust & stargazer & texreg
ols03es <- lm_robust(sales_mio ~ chapters + last_movie_days + vixi_avg,
data = op_sales, se_type = "stata")
ols04es <- lm_robust(sales_mio ~ pages + last_movie_days + vixi_avg,
data = op_sales, se_type = "stata")
#stargazer(ols03es, ols04es, type = "text", out = "ols_estimatr.txt")
#wordreg(list(ols03es, ols04es), digits = 2, file = "ols_estimatr.doc")#source: http://www.strengejacke.de/sjPlot/index.html
pacman::p_load(sjPlot, sjmisc, sjlabelled)
theme_set(theme_test())
plot_model(ols01bs,
title = "Sales (in mio)",
show.values = TRUE,
type = "est",
digits = 4) +
#edit with ggplot2
scale_y_continuous(limits = c(-0.12,0.12))## Warning: Argument 'df_method' is deprecated. Please use 'ci_method' instead.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
plot_model(ols02bs,
title = "Sales (in mio)",
show.values = TRUE,
type = "est",
digits = 4) +
#edit with ggplot2
scale_y_continuous(limits = c(-0.12,0.12))## Warning: Argument 'df_method' is deprecated. Please use 'ci_method' instead.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
plot_models(ols01bs, ols02bs,
title = "Sales (in mio)",
show.values = TRUE,
show.intercept = FALSE,
line.size = 0.1,
p.shape = TRUE,
value.size = 3,
digits = 4) +
#edit with ggplot2
scale_y_continuous(limits = c(-0.12,0.12)) +
theme(
axis.text.x = element_text(size = 8),
axis.ticks.x = element_blank(),
axis.line.x = element_blank(),
axis.title.x = element_text(size = 8),
axis.line.y = element_blank(),
axis.title.y = element_text(size = 8),
axis.text.y = element_text(size = 8),
plot.title.position = "plot",
plot.caption.position = "plot",
legend.title = element_text(size = 8),
legend.text = element_text(size = 8),
legend.key.size = unit(0.5, "cm"),
legend.spacing = unit(1, "cm"),
plot.title = element_text(hjust = 0, size = 16, face = "bold"),
plot.subtitle = element_text(hjust = 0, size = 12),
plot.caption = element_text(hjust = 0, size = 12),
# legend.title = element_blank(),
legend.position = "bottom",
strip.text.x = element_text(size = 8),
panel.grid.major.y = element_line(colour = "grey97"),
panel.ontop = FALSE,
panel.spacing = unit(1, "lines")
)## Warning: Argument 'df_method' is deprecated. Please use 'ci_method' instead.
## Warning: Argument 'df_method' is deprecated. Please use 'ci_method' instead.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
forestmodellibrary(forestmodel)
forest_model(ols01bs, theme = theme_forest())forest_model(ols02bs, theme = theme_forest())gl01bs <- broom::glance(ols01bs) %>% t() %>% round(digits = 2)
gl02bs <- broom::glance(ols02bs) %>% t() %>% round(digits = 2)
cbind(gl01bs, gl02bs)## [,1] [,2]
## r.squared 0.18 0.29
## adj.r.squared 0.13 0.25
## sigma 0.41 0.39
## statistic 3.55 6.50
## p.value 0.02 0.00
## df 3.00 3.00
## logLik -25.42 -21.78
## AIC 60.85 53.57
## BIC 70.51 63.22
## deviance 8.09 7.02
## df.residual 47.00 47.00
## nobs 51.00 51.00
pacman::p_load(patchwork, performance)
(
ch01 <- check_model(ols01bs)
)(
ch02 <- check_model(ols02bs)
)#install.packages("performance", repos = "https://easystats.r-universe.dev")
library(performance)
library(see)
(
comp <- compare_performance(ols01bs, ols02bs)
)plot(comp)#clearing the environment
ls()## [1] "ch01" "ch02" "comp" "corr01" "db01" "gl01bs"
## [7] "gl02bs" "ols01bs" "ols01dp" "ols01es" "ols02bs" "ols02dp"
## [13] "ols02es" "ols03es" "ols04es" "op_sales" "plot01" "plot02"
## [19] "plot03" "summ01" "summ02" "summ03"
rm(list=ls())Contact me at ap.sulistiono@gmail.com